************************************************************************************************
* Name			: prepare estimation samples.do
*
* Description	: This dofile selects the sample, standardizes skill and investment measures, 
* 				  and creates the bootstrapped samples for estimation. 	
************************************************************************************************

********************************************************************************
* SETTINGS 
********************************************************************************
clear 				all 
set more 			off 
set graphics 		off 
version 			15          /*Use Stata 15*/ 
set seed			16072015   	/* This is the seed number that was used to create the bootstrapped samples for estimation of the model in the paper */ 


* Options for data to create 
global testerFE 	no 			/*Take out tester FE before standardizing measures*/ 
global invar 		no 			/*Incorporate intercepts in the measurement system for when we will allow for measurement system variance*/ 
global pure 		no 			/*Only pure control and pure stimulation groups*/ 


* Globals of directories, variable groups and others 
run "$dir_statacode\globals.do"                           

************************************************************************************************ 
* Select sample 
************************************************************************************************ 
* Open dataset with non-standardized skill and investment measures, and instruments 
use "$dir_data\dataset.dta", clear 

* Take out three observations that are outliers on the Bayley 
g outlier_cog =(llaveper ==588702102 | llaveper ==1529903302 | llaveper ==6846400602)
drop if outlier_cog==1

* Define two treatment groups (instead of four) 
gen treat=1 if group0==1 | group0==4 
replace treat=2 if group0==2 | group0==3

* Drop nutrion only and nutrion + stimulation groups in the case we're running the 
* estimation on the "pure samples" 
if ("$pure"=="yes"){
drop if group0==2 
drop if group0==4
} 
 
 * Define region dummies 
 gen oriental = (region==1) 
 gen central = (region==2) 
 
* Save dataset before standardizing the measures
save "$dir_data\finalsample_nonstandardized.dta", replace

************************************************************************************************ 
* If option testerFE = yes, take out tester FE 
************************************************************************************************ 
if ("$testerFE" == "yes" ){ 
#delim; 
foreach var in  mac_words0 mac_unders0 bates_unsociable0 bates_difficult0 bates_unadaptable0 
bates_unstoppable0 edu_yrs_mo0 cesdA0 cesdB0 cesdC0 cesdD0 cesdE0 cesdF0 cesdG0 cesdH0 cesdI0 cesdJ0
adult_books0 adult_mags0{ ;
reg `var' i.ent_bl;
predict `var'_res, resid;
replace `var' = `var'_res ;
drop `var'_res;
} ;


foreach var in b_tot_cog0 b_tot_lr0 b_tot_le0 b_tot_mf0 b_tot_mg0 {; 
reg `var' i.eva_bl;
predict `var'_res, resid;
replace `var' = `var'_res ;
drop `var'_res;
} ;

foreach var in mac_words1 mac_phrases1
fci_play_mat_type1 Npicbooks1 Npaintbooks1 Ntoysmade1  Ntoysbought1 Nthingsmove1 Ntoysshape1 Ndolls1 
NtoysgivenML1  NtoysmadeML1 Ntoysmade1 Npicbooksi1 
fci_play_act1 home_stories1 home_read1 home_outside1 home_toys1 home_name1
bates_unsociable1 bates_difficult1 bates_unadaptable1 bates_unstoppable1 roth_attention1 roth_inhibit1 roth_sociable1
peabody_mo1 cesdA1 cesdB1 cesdC1 cesdD1 cesdE1 cesdF1 cesdG1 cesdH1 cesdI1 cesdJ1 raventot{ ;
reg `var' i.ent_fu;
predict `var'_res, resid;
replace `var' = `var'_res ;
drop `var'_res;
} ;


foreach var in b_tot_cog1 b_tot_lr1 b_tot_le1 b_tot_mf1 b_tot_mg1 { ;
reg `var' i.eva_fu;
predict `var'_res, resid;
replace `var' = `var'_res ;
drop `var'_res;
} ;
#delim cr 
}  

************************************************************************************************ 
* Standardize the meausures non-parametrically for the child's age  
************************************************************************************************ 
if ("$invar"=="no"){
foreach y of global child_tostd0 {
    cap drop `y'_st
   
    cap drop r r2 mean mean
    lpoly `y' b_age_mth0 if treat==1, at(b_age_mth0) gen(mean)
    g r =`y' - mean
    g r2=r^2

    cap drop var std
    lpoly r2 b_age_mth0 if treat==1, at(b_age_mth0) gen(var)
    g std = sqrt(var)
    g `y'_st =(`y' - mean)/std 
}


foreach y of global child_tostd1 {
if ("`y'"=="NtoysgivenML1"  | "`y'"=="NtoysmadeML1" | "`y'"=="Npicbooksi1"){
   cap drop `y'_st
   
    cap drop r r2 mean mean
    lpoly `y' b_age_mth1 if treat==1, at(b_age_mth1) gen(mean)
    g r =`y' - mean
    g r2=r^2

    cap drop var std
    lpoly r2 b_age_mth1 if treat==2, at(b_age_mth1) gen(var)
    g std =sqrt(var)
    g `y'_st =(`y' - mean)/std 

  } 
  
  else{ 
     cap drop `y'_st
	 
    cap drop r r2 mean mean
    lpoly `y' b_age_mth1 if treat==1, at(b_age_mth1) gen(mean)
    g r =`y' - mean
    g r2=r^2

    cap drop var std
    lpoly r2 b_age_mth1 if treat==1, at(b_age_mth1) gen(var)
    g std =sqrt(var)
    g `y'_st =(`y' - mean)/std 
} 
} 


foreach y of global mother_tostd0{ 
cap drop `y'_st
   
    cap drop r r2 mean mean
    lpoly `y' age_mo0 if treat==1, at(age_mo0) gen(mean)
    g r =`y' - mean
    g r2=r^2

    cap drop var std
    lpoly r2 age_mo0 if treat==1, at(age_mo0) gen(var)
    g std =sqrt(var)
    g `y'_st =(`y' - mean)/std 

} 

foreach y of global mother_tostd1{ 
cap drop `y'_st
   
    cap drop r r2 mean mean
    lpoly `y' age_mo1 if treat==1, at(age_mo1) gen(mean)
    g r =`y' - mean
    g r2=r^2

    cap drop var std
    lpoly r2 age_mo1 if treat==1, at(age_mo1) gen(var)
    g std =sqrt(var)
    g `y'_st =(`y' - mean)/std 
} 

} 


if ("$invar"=="yes"){
foreach y of global child_tostd0 {
    cap drop `y'_st
   
    cap drop r r2 mean mean
    lpoly `y' b_age_mth0, at(b_age_mth0) gen(mean)
    g r =`y' - mean
    g r2=r^2

    cap drop var std
    lpoly r2 b_age_mth0 , at(b_age_mth0) gen(var)
    g std = sqrt(var)
    g `y'_st =(`y' - mean)/std 
}


foreach y of global child_tostd1 {
    cap drop `y'_st
   
    cap drop r r2 mean mean
    lpoly `y' b_age_mth1 , at(b_age_mth1) gen(mean)
    g r =`y' - mean
    g r2=r^2

    cap drop var std
    lpoly r2 b_age_mth1 , at(b_age_mth1) gen(var)
    g std =sqrt(var)
    g `y'_st =(`y' - mean)/std 
}


foreach y of global mother_tostd0{ 
cap drop `y'_st
   
    cap drop r r2 mean mean
    lpoly `y' age_mo0 , at(age_mo0) gen(mean)
    g r =`y' - mean
    g r2=r^2

    cap drop var std
    lpoly r2 age_mo0 , at(age_mo0) gen(var)
    g std =sqrt(var)
    g `y'_st =(`y' - mean)/std 

} 

foreach y of global mother_tostd1{ 
cap drop `y'_st
   
    cap drop r r2 mean mean
    lpoly `y' age_mo1 , at(age_mo1) gen(mean)
    g r =`y' - mean
    g r2=r^2

    cap drop var std
    lpoly r2 age_mo1 , at(age_mo1) gen(var)
    g std =sqrt(var)
    g `y'_st =(`y' - mean)/std 
} 
} 

************************************************************************************************ 
* Take the logs of some of the X's 
************************************************************************************************
foreach v in nkids0 nkids1{ 
gen ln_`v' = log(`v') 
} 

************************************************************************************************ 
* Standardize all the other measures so that the mean of the control group has mean 0, sd1 
************************************************************************************************ 

if ("$invar"=="no"){ 
	foreach v in $X { 
	egen `v'_sdc = sd(`v') if treat==1
	egen `v'_sd = max(`v'_sdc) 
	egen `v'_mc = mean(`v') if treat==1
	egen `v'_m = max(`v'_mc) 
	gen  `v'_st = (`v' - `v'_m) / `v'_sd 
	drop `v'_sd `v'_sdc `v'_m `v'_mc
	} 
} 

if ("$invar"=="yes"){ 
	foreach v in $X { 
	egen `v'_sdc = sd(`v') 
	egen `v'_sd = max(`v'_sdc) 
	egen `v'_mc = mean(`v') 
	egen `v'_m = max(`v'_mc) 
	gen  `v'_st = (`v' - `v'_m) / `v'_sd 
	drop `v'_sd `v'_sdc `v'_m `v'_mc
	} 
} 

************************************************************************************************ 
* Save dataset with standardized measures and instruments in stata format and CSV (to import into R)
************************************************************************************************
sort treat llaveper cod_dane 

outsheet $measures using "$dir_data/measures.csv", replace comma

save "$dir_data/measures.dta", replace

************************************************************************************************ 
* Save dataset with standardized measures into Mplus format to perform the EFA
************************************************************************************************
stata2mplus $measures using "$dir_mplus/measures_for_efa", replace 


************************************************************************************************ 
* Generate bootstrap samples with replacement at the cluster level (cod_dane) 
************************************************************************************************
keep  $measures

* Make bootstrap samples  
local clus 96 
if ("$pure"=="yes") local clus 48

forval i=1(1)1000{
preserve 
bsample `clus', cluster(cod_dane)
sort treat llaveper cod_dane 
outsheet using "$dir_data\measures_b`i'.csv", replace comma 
restore
} 
 
